Calibrating Hedonic Pricing Model for Private Highrise Property with GWR Method

Overview

This exercise is a revision of the Hands-on Exercise 4.

Getting Started

First, we need to load the necessary packages.

pacman::p_load(olsrr, corrplot, tidyverse, ggpubr, sf, spdep, GWmodel, tmap, gtsummary)

Now we import the geospatial data.

mpsz = st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL") %>% 
  st_transform(crs = 3414)
Reading layer `MP14_SUBZONE_WEB_PL' from data source 
  `/Users/sylvia/sylvie-le/ISSS624/In-class_Ex/In-class_Ex4/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21

Now we import the aspatial data. At the same time, we also transform it into SVY21 projected coordinate system.

condo_resale.sf = read_csv("data/aspatial/Condo_resale_2015.csv") %>% 
  st_as_sf(coords = c('LONGITUDE', 'LATITUDE'), crs = 4326) %>% 
  st_transform(crs = 3414)

Exploratory Data Analysis

EDA using summary

First, we will look at the variables in condo_resale.sf file for their distribution.

summary(condo_resale.sf)
    POSTCODE      SELLING_PRICE         AREA_SQM          AGE       
 Min.   : 18965   Min.   :  540000   Min.   : 34.0   Min.   : 0.00  
 1st Qu.:259849   1st Qu.: 1100000   1st Qu.:103.0   1st Qu.: 5.00  
 Median :469298   Median : 1383222   Median :121.0   Median :11.00  
 Mean   :440439   Mean   : 1751211   Mean   :136.5   Mean   :12.14  
 3rd Qu.:589486   3rd Qu.: 1950000   3rd Qu.:156.0   3rd Qu.:18.00  
 Max.   :828833   Max.   :18000000   Max.   :619.0   Max.   :37.00  
    PROX_CBD       PROX_CHILDCARE     PROX_ELDERLYCARE  PROX_URA_GROWTH_AREA
 Min.   : 0.3869   Min.   :0.004927   Min.   :0.05451   Min.   :0.2145      
 1st Qu.: 5.5574   1st Qu.:0.174481   1st Qu.:0.61254   1st Qu.:3.1643      
 Median : 9.3567   Median :0.258135   Median :0.94179   Median :4.6186      
 Mean   : 9.3254   Mean   :0.326313   Mean   :1.05351   Mean   :4.5981      
 3rd Qu.:12.6661   3rd Qu.:0.368293   3rd Qu.:1.35122   3rd Qu.:5.7550      
 Max.   :19.1804   Max.   :3.465726   Max.   :3.94916   Max.   :9.1554      
 PROX_HAWKER_MARKET PROX_KINDERGARTEN     PROX_MRT         PROX_PARK      
 Min.   :0.05182    Min.   :0.004927   Min.   :0.05278   Min.   :0.02906  
 1st Qu.:0.55245    1st Qu.:0.276345   1st Qu.:0.34646   1st Qu.:0.26211  
 Median :0.90842    Median :0.413385   Median :0.57430   Median :0.39926  
 Mean   :1.27987    Mean   :0.458903   Mean   :0.67316   Mean   :0.49802  
 3rd Qu.:1.68578    3rd Qu.:0.578474   3rd Qu.:0.84844   3rd Qu.:0.65592  
 Max.   :5.37435    Max.   :2.229045   Max.   :3.48037   Max.   :2.16105  
 PROX_PRIMARY_SCH  PROX_TOP_PRIMARY_SCH PROX_SHOPPING_MALL PROX_SUPERMARKET
 Min.   :0.07711   Min.   :0.07711      Min.   :0.0000     Min.   :0.0000  
 1st Qu.:0.44024   1st Qu.:1.34451      1st Qu.:0.5258     1st Qu.:0.3695  
 Median :0.63505   Median :1.88213      Median :0.9357     Median :0.5687  
 Mean   :0.75471   Mean   :2.27347      Mean   :1.0455     Mean   :0.6141  
 3rd Qu.:0.95104   3rd Qu.:2.90954      3rd Qu.:1.3994     3rd Qu.:0.7862  
 Max.   :3.92899   Max.   :6.74819      Max.   :3.4774     Max.   :2.2441  
 PROX_BUS_STOP       NO_Of_UNITS     FAMILY_FRIENDLY     FREEHOLD     
 Min.   :0.001595   Min.   :  18.0   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.098356   1st Qu.: 188.8   1st Qu.:0.0000   1st Qu.:0.0000  
 Median :0.151710   Median : 360.0   Median :0.0000   Median :0.0000  
 Mean   :0.193974   Mean   : 409.2   Mean   :0.4868   Mean   :0.4227  
 3rd Qu.:0.220466   3rd Qu.: 590.0   3rd Qu.:1.0000   3rd Qu.:1.0000  
 Max.   :2.476639   Max.   :1703.0   Max.   :1.0000   Max.   :1.0000  
 LEASEHOLD_99YR            geometry   
 Min.   :0.0000   POINT        :1436  
 1st Qu.:0.0000   epsg:3414    :   0  
 Median :0.0000   +proj=tmer...:   0  
 Mean   :0.4882                       
 3rd Qu.:1.0000                       
 Max.   :1.0000                       

There are many variables so summary is not the best tool to explore them.

EDA using graph

EDA dependent variable

Our dependent variable is SELLING_PRICE, which we will inspect using the plot below.

ggplot(data=condo_resale.sf, aes(x=`SELLING_PRICE`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

Because the distribution of SELLING_PRICE is highly skew to the right, we need to normalize it.

condo_resale.sf <- condo_resale.sf %>%
  mutate(`LOG_SELLING_PRICE` = log(SELLING_PRICE))

EDA independent variable

Let’s plot multiple graphs to view the variables’ distribution.

AREA_SQM <- ggplot(data=condo_resale.sf, aes(x= `AREA_SQM`)) + 
  geom_histogram(bins=20, color="black", fill="light blue")

AGE <- ggplot(data=condo_resale.sf, aes(x= `AGE`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_CBD <- ggplot(data=condo_resale.sf, aes(x= `PROX_CBD`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_CHILDCARE <- ggplot(data=condo_resale.sf, aes(x= `PROX_CHILDCARE`)) + 
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_ELDERLYCARE <- ggplot(data=condo_resale.sf, aes(x= `PROX_ELDERLYCARE`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_URA_GROWTH_AREA <- ggplot(data=condo_resale.sf, 
                               aes(x= `PROX_URA_GROWTH_AREA`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_HAWKER_MARKET <- ggplot(data=condo_resale.sf, aes(x= `PROX_HAWKER_MARKET`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_KINDERGARTEN <- ggplot(data=condo_resale.sf, aes(x= `PROX_KINDERGARTEN`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_MRT <- ggplot(data=condo_resale.sf, aes(x= `PROX_MRT`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_PARK <- ggplot(data=condo_resale.sf, aes(x= `PROX_PARK`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_PRIMARY_SCH <- ggplot(data=condo_resale.sf, aes(x= `PROX_PRIMARY_SCH`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

PROX_TOP_PRIMARY_SCH <- ggplot(data=condo_resale.sf, 
                               aes(x= `PROX_TOP_PRIMARY_SCH`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

ggarrange(AREA_SQM, AGE, PROX_CBD, PROX_CHILDCARE, PROX_ELDERLYCARE, 
          PROX_URA_GROWTH_AREA, PROX_HAWKER_MARKET, PROX_KINDERGARTEN, PROX_MRT,
          PROX_PARK, PROX_PRIMARY_SCH, PROX_TOP_PRIMARY_SCH,  
          ncol = 3, nrow = 4)

Drawing Statistical Point Map

tmap_mode("view")
tm_shape(mpsz)+
  tm_polygons() +
  tm_shape(condo_resale.sf) +  
  tm_dots(col = "SELLING_PRICE",
          alpha = 0.6,
          style="quantile") +
  tm_view(set.zoom.limits = c(11,14)) +
  tmap_options(check.and.fix = TRUE)
tmap_mode("plot")

Hedonic Pricing Modelling in R

Simple Linear Regression Method

To build a linear regression model with SELLING_PRICE as the dependent variable, and AREA_SQM as the independent variable. the lm() function creates a lm object (lm stands for linear model), which is assigned to condo.slr.

condo.slr <- lm(formula=SELLING_PRICE ~ AREA_SQM, data = condo_resale.sf)

There are many ways to examine lm objects. Here we use summary but clicking on the object’s name in the Environment tab will also do.

summary(condo.slr)

Call:
lm(formula = SELLING_PRICE ~ AREA_SQM, data = condo_resale.sf)

Residuals:
     Min       1Q   Median       3Q      Max 
-3695815  -391764   -87517   258900 13503875 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -258121.1    63517.2  -4.064 5.09e-05 ***
AREA_SQM      14719.0      428.1  34.381  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 942700 on 1434 degrees of freedom
Multiple R-squared:  0.4518,    Adjusted R-squared:  0.4515 
F-statistic:  1182 on 1 and 1434 DF,  p-value: < 2.2e-16

There are three things to notice:

  • The R Square value is only 0.4518, meaning about 45% of selling price values is explained by the area square meter.

  • Since this model only contains one independent variable, we do not need to refer to the Adjusted R Square.

  • The p-value of the intercept (B0) and coefficient (B1) is less than 0.05. We cannot reject the null hypothesis of B0 = 0 and B1 = 0 at 95% confidence interval.

We can also visualize the regression line using the lm object and ggplot() function. Remember to put method = lm in geom_smooth().

ggplot(data=condo_resale.sf,  
       aes(x=`AREA_SQM`, y=`SELLING_PRICE`)) +
  geom_point() +
  geom_smooth(method = lm)

Multiple Linear Regression Method

Checking for multicollinearity

When doing multiple linear regression, we need to check for multicollinearity between the independent variables and take care of the high correlation between them, if any.

corrplot(cor(st_drop_geometry(condo_resale.sf[, 3:21])), diag = FALSE, order = "AOE",
         tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper")

Matrix reorder is very important for mining the hiden structure and patter in the matrix. There are four methods in corrplot (parameter order), named "AOE", "FPC", "hclust", "alphabet". In the code chunk above, AOE order is used. It orders the variables by using the angular order of the eigenvectors method suggested by Michael Friendly.

From the plot above, we can see that LEASE_99YEAR and Freehold are highly correlated. We will not include LEASE_99YEAR in the modeling.

Building a hedonic pricing model using multiple linear regression method

condo.mlr <- lm(formula = SELLING_PRICE ~ AREA_SQM + AGE    + 
                  PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
                  PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN + 
                  PROX_MRT  + PROX_PARK + PROX_PRIMARY_SCH + 
                  PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET + 
                  PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD, 
                data=condo_resale.sf)
summary(condo.mlr)

Call:
lm(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD + PROX_CHILDCARE + 
    PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + 
    PROX_KINDERGARTEN + PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH + 
    PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET + 
    PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD, 
    data = condo_resale.sf)

Residuals:
     Min       1Q   Median       3Q      Max 
-3475964  -293923   -23069   241043 12260381 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           481728.40  121441.01   3.967 7.65e-05 ***
AREA_SQM               12708.32     369.59  34.385  < 2e-16 ***
AGE                   -24440.82    2763.16  -8.845  < 2e-16 ***
PROX_CBD              -78669.78    6768.97 -11.622  < 2e-16 ***
PROX_CHILDCARE       -351617.91  109467.25  -3.212  0.00135 ** 
PROX_ELDERLYCARE      171029.42   42110.51   4.061 5.14e-05 ***
PROX_URA_GROWTH_AREA   38474.53   12523.57   3.072  0.00217 ** 
PROX_HAWKER_MARKET     23746.10   29299.76   0.810  0.41782    
PROX_KINDERGARTEN     147468.99   82668.87   1.784  0.07466 .  
PROX_MRT             -314599.68   57947.44  -5.429 6.66e-08 ***
PROX_PARK             563280.50   66551.68   8.464  < 2e-16 ***
PROX_PRIMARY_SCH      180186.08   65237.95   2.762  0.00582 ** 
PROX_TOP_PRIMARY_SCH    2280.04   20410.43   0.112  0.91107    
PROX_SHOPPING_MALL   -206604.06   42840.60  -4.823 1.57e-06 ***
PROX_SUPERMARKET      -44991.80   77082.64  -0.584  0.55953    
PROX_BUS_STOP         683121.35  138353.28   4.938 8.85e-07 ***
NO_Of_UNITS             -231.18      89.03  -2.597  0.00951 ** 
FAMILY_FRIENDLY       140340.77   47020.55   2.985  0.00289 ** 
FREEHOLD              359913.01   49220.22   7.312 4.38e-13 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 755800 on 1417 degrees of freedom
Multiple R-squared:  0.6518,    Adjusted R-squared:  0.6474 
F-statistic: 147.4 on 18 and 1417 DF,  p-value: < 2.2e-16

Preparing Publication Quality Table: olsrr method

The report above shows us the metrics we will use to analyze the quality of the model later, which are R Square, Adjusted R Square, p-values. However, it contains a lot of text and the layout is not very well organized. To address that, we will use the olsrr package.

Note: I don’t calculate condo.mlr1 because I want to explore the model with more independent variables.

ols_regress(condo.mlr)
                             Model Summary                               
------------------------------------------------------------------------
R                       0.807       RMSE                     755816.386 
R-Squared               0.652       Coef. Var                    43.160 
Adj. R-Squared          0.647       MSE                571258408962.149 
Pred R-Squared          0.637       MAE                      413425.809 
------------------------------------------------------------------------
 RMSE: Root Mean Square Error 
 MSE: Mean Square Error 
 MAE: Mean Absolute Error 

                                     ANOVA                                       
--------------------------------------------------------------------------------
                    Sum of                                                      
                   Squares          DF         Mean Square       F         Sig. 
--------------------------------------------------------------------------------
Regression    1.515174e+15          18        8.417631e+13    147.352    0.0000 
Residual      8.094732e+14        1417    571258408962.149                      
Total         2.324647e+15        1435                                          
--------------------------------------------------------------------------------

                                               Parameter Estimates                                                
-----------------------------------------------------------------------------------------------------------------
               model           Beta    Std. Error    Std. Beta       t        Sig           lower          upper 
-----------------------------------------------------------------------------------------------------------------
         (Intercept)     481728.405    121441.014                   3.967    0.000     243504.909     719951.900 
            AREA_SQM      12708.324       369.590        0.580     34.385    0.000      11983.322      13433.326 
                 AGE     -24440.816      2763.164       -0.165     -8.845    0.000     -29861.148     -19020.484 
            PROX_CBD     -78669.779      6768.972       -0.268    -11.622    0.000     -91948.061     -65391.496 
      PROX_CHILDCARE    -351617.910    109467.252       -0.092     -3.212    0.001    -566353.201    -136882.619 
    PROX_ELDERLYCARE     171029.418     42110.506        0.083      4.061    0.000      88423.783     253635.053 
PROX_URA_GROWTH_AREA      38474.534     12523.567        0.059      3.072    0.002      13907.809      63041.258 
  PROX_HAWKER_MARKET      23746.098     29299.755        0.019      0.810    0.418     -33729.461      81221.657 
   PROX_KINDERGARTEN     147468.986     82668.868        0.031      1.784    0.075     -14697.534     309635.506 
            PROX_MRT    -314599.679     57947.441       -0.120     -5.429    0.000    -428271.672    -200927.687 
           PROX_PARK     563280.499     66551.675        0.148      8.464    0.000     432730.102     693830.897 
    PROX_PRIMARY_SCH     180186.083     65237.948        0.070      2.762    0.006      52212.744     308159.421 
PROX_TOP_PRIMARY_SCH       2280.036     20410.435        0.002      0.112    0.911     -37757.880      42317.951 
  PROX_SHOPPING_MALL    -206604.057     42840.595       -0.108     -4.823    0.000    -290641.863    -122566.252 
    PROX_SUPERMARKET     -44991.803     77082.635       -0.012     -0.584    0.560    -196200.149     106216.542 
       PROX_BUS_STOP     683121.347    138353.278        0.134      4.938    0.000     411722.087     954520.608 
         NO_Of_UNITS       -231.180        89.033       -0.050     -2.597    0.010       -405.830        -56.530 
     FAMILY_FRIENDLY     140340.770     47020.551        0.055      2.985    0.003      48103.399     232578.141 
            FREEHOLD     359913.008     49220.224        0.140      7.312    0.000     263360.671     456465.345 
-----------------------------------------------------------------------------------------------------------------

Preparing Publication Quality Table: gtsummary method

Besides olsrr, we can also use the gtsummary package.

tbl_regression(condo.mlr, intercept = TRUE)
Characteristic Beta 95% CI1 p-value
(Intercept) 481,728 243,505, 719,952 <0.001
AREA_SQM 12,708 11,983, 13,433 <0.001
AGE -24,441 -29,861, -19,020 <0.001
PROX_CBD -78,670 -91,948, -65,391 <0.001
PROX_CHILDCARE -351,618 -566,353, -136,883 0.001
PROX_ELDERLYCARE 171,029 88,424, 253,635 <0.001
PROX_URA_GROWTH_AREA 38,475 13,908, 63,041 0.002
PROX_HAWKER_MARKET 23,746 -33,729, 81,222 0.4
PROX_KINDERGARTEN 147,469 -14,698, 309,636 0.075
PROX_MRT -314,600 -428,272, -200,928 <0.001
PROX_PARK 563,280 432,730, 693,831 <0.001
PROX_PRIMARY_SCH 180,186 52,213, 308,159 0.006
PROX_TOP_PRIMARY_SCH 2,280 -37,758, 42,318 >0.9
PROX_SHOPPING_MALL -206,604 -290,642, -122,566 <0.001
PROX_SUPERMARKET -44,992 -196,200, 106,217 0.6
PROX_BUS_STOP 683,121 411,722, 954,521 <0.001
NO_Of_UNITS -231 -406, -57 0.010
FAMILY_FRIENDLY 140,341 48,103, 232,578 0.003
FREEHOLD 359,913 263,361, 456,465 <0.001
1 CI = Confidence Interval

With gtsummary package, model statistics can be included in the report by either appending them to the report table by using add_glance_table() or adding as a table source note by using add_glance_source_note() as shown in the code chunk below.

tbl_regression(condo.mlr, 
               intercept = TRUE) %>% 
  add_glance_source_note(
    label = list(sigma ~ "\U03C3"),
    include = c(r.squared, adj.r.squared, 
                AIC, statistic,
                p.value, sigma))
Characteristic Beta 95% CI1 p-value
(Intercept) 481,728 243,505, 719,952 <0.001
AREA_SQM 12,708 11,983, 13,433 <0.001
AGE -24,441 -29,861, -19,020 <0.001
PROX_CBD -78,670 -91,948, -65,391 <0.001
PROX_CHILDCARE -351,618 -566,353, -136,883 0.001
PROX_ELDERLYCARE 171,029 88,424, 253,635 <0.001
PROX_URA_GROWTH_AREA 38,475 13,908, 63,041 0.002
PROX_HAWKER_MARKET 23,746 -33,729, 81,222 0.4
PROX_KINDERGARTEN 147,469 -14,698, 309,636 0.075
PROX_MRT -314,600 -428,272, -200,928 <0.001
PROX_PARK 563,280 432,730, 693,831 <0.001
PROX_PRIMARY_SCH 180,186 52,213, 308,159 0.006
PROX_TOP_PRIMARY_SCH 2,280 -37,758, 42,318 >0.9
PROX_SHOPPING_MALL -206,604 -290,642, -122,566 <0.001
PROX_SUPERMARKET -44,992 -196,200, 106,217 0.6
PROX_BUS_STOP 683,121 411,722, 954,521 <0.001
NO_Of_UNITS -231 -406, -57 0.010
FAMILY_FRIENDLY 140,341 48,103, 232,578 0.003
FREEHOLD 359,913 263,361, 456,465 <0.001
R² = 0.652; Adjusted R² = 0.647; AIC = 42,970; Statistic = 147; p-value = <0.001; σ = 755,816
1 CI = Confidence Interval

Checking for multicolinearity

olsrr can do more than just displaying tables. It can also be used to build regression models. Below is the list of what the package can do.

  • comprehensive regression output

  • residual diagnostics

  • measures of influence

  • heteroskedasticity tests

  • collinearity diagnostics

  • model fit assessment

  • variable contribution assessment

  • variable selection procedures

ols_vif_tol() is used to check for multicollinearity.

ols_vif_tol(condo.mlr)
              Variables Tolerance      VIF
1              AREA_SQM 0.8625928 1.159296
2                   AGE 0.7026139 1.423257
3              PROX_CBD 0.4605774 2.171188
4        PROX_CHILDCARE 0.2981029 3.354546
5      PROX_ELDERLYCARE 0.5922259 1.688545
6  PROX_URA_GROWTH_AREA 0.6614127 1.511915
7    PROX_HAWKER_MARKET 0.4373889 2.286295
8     PROX_KINDERGARTEN 0.8370845 1.194622
9              PROX_MRT 0.5049530 1.980382
10            PROX_PARK 0.8018396 1.247132
11     PROX_PRIMARY_SCH 0.3855782 2.593508
12 PROX_TOP_PRIMARY_SCH 0.4968645 2.012621
13   PROX_SHOPPING_MALL 0.4906426 2.038144
14     PROX_SUPERMARKET 0.6152063 1.625471
15        PROX_BUS_STOP 0.3320516 3.011580
16          NO_Of_UNITS 0.6731165 1.485627
17      FAMILY_FRIENDLY 0.7202230 1.388459
18             FREEHOLD 0.6729095 1.486084

No variable has VIF more than 10, so we can conclude that there are no sign of multicollinearity among the independent variables.

Testing for non-linearity

When doing regression model, we also need to test if the relationship between the dependent and independent variables are linear or not. We can do that using ols_plot_resid_fit().

ols_plot_resid_fit(condo.mlr)

Most of the residuals are scattered around the 0 line, so we can conclude that the relationship in concern is linear.

Testing for normality assumption

ols_plot_resid_hist(condo.mlr)

We can see that the residuals follow a normal distribution. A formal statistical method display of the test result can be achieved using ols_test_normality().

ols_test_normality(condo.mlr)
-----------------------------------------------
       Test             Statistic       pvalue  
-----------------------------------------------
Shapiro-Wilk              0.6836         0.0000 
Kolmogorov-Smirnov        0.1388         0.0000 
Cramer-von Mises         120.5692        0.0000 
Anderson-Darling         68.3268         0.0000 
-----------------------------------------------

With many tests listed above, the p-value is less than 0.05. Therefore, we can reject the null hypothesis and conclude that the residuals are normally distributed at the 95% confidence interval.

Testing for Spatial Autocorrelation

The hedonic model we try to build are using geographically referenced attributes, hence it is also important for us to visual the residual of the hedonic pricing model.

In order to perform spatial autocorrelation test, we need to convert condo_resale.sf from sf data frame into a SpatialPointsDataFrame.

mlr.output <- as.data.frame(condo.mlr$residuals)
condo_resale.res.sf <- cbind(condo_resale.sf, 
                        condo.mlr$residuals) %>%
rename(`MLR_RES` = `condo.mlr.residuals`)
condo_resale.sp <- as_Spatial(condo_resale.res.sf)

Now we can create an interactive map.

tmap_mode("view")
tm_shape(mpsz)+
  tmap_options(check.and.fix = TRUE) +
  tm_polygons(alpha = 0.4) +
tm_shape(condo_resale.res.sf) +  
  tm_dots(col = "MLR_RES",
          alpha = 0.6,
          style="quantile") +
  tm_view(set.zoom.limits = c(11,14))
tmap_mode("plot")

We can see that there are some spots where the high value of residuals tend to cluster together. It is a sign of spatial autocorrelation. We will use Moran’s I test to confirm. The test is based on the contiguity weight matrix using the Queen method.

nb <- dnearneigh(coordinates(condo_resale.sp), 0, 1500, longlat = FALSE)
nb_lw <- nb2listw(nb, style = 'W')
lm.morantest(condo.mlr, nb_lw)

    Global Moran I for regression residuals

data:  
model: lm(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD +
PROX_CHILDCARE + PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA +
PROX_HAWKER_MARKET + PROX_KINDERGARTEN + PROX_MRT + PROX_PARK +
PROX_PRIMARY_SCH + PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL +
PROX_SUPERMARKET + PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY +
FREEHOLD, data = condo_resale.sf)
weights: nb_lw

Moran I statistic standard deviate = 24.673, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Observed Moran I      Expectation         Variance 
    1.392527e-01    -7.134319e-03     3.520278e-05 

The p-value of the test is less than 0.05. We will reject the null hypothesis that the residuals is randomly distributed. In addition, the Moran value is greater than 0, meaning the residuals resemble the cluster distribution.

Building Hedonic Pricing Models using GWmodel

Building Fixed Bandwidth GWR Model

We will use br.gwr() to determine the optimal fixed bandwidth to use in the model. adaptive is set to FALSE to reflect the fixed bandwidth method. CV cross-validation approach is used, so approach = CV.

bw.fixed <- bw.gwr(formula = SELLING_PRICE ~ AREA_SQM + AGE + 
                     PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
                     PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN + 
                     PROX_MRT  + PROX_PARK + PROX_PRIMARY_SCH + 
                     PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
                     PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD, 
                   data=condo_resale.sp, 
                   approach="CV", 
                   kernel="gaussian", 
                   adaptive=FALSE, 
                   longlat=FALSE)
Fixed bandwidth: 17660.96 CV score: 8.235467e+14 
Fixed bandwidth: 10917.26 CV score: 7.902384e+14 
Fixed bandwidth: 6749.419 CV score: 7.152539e+14 
Fixed bandwidth: 4173.553 CV score: 6.182116e+14 
Fixed bandwidth: 2581.58 CV score: 5.257275e+14 
Fixed bandwidth: 1597.687 CV score: 4.748442e+14 
Fixed bandwidth: 989.6077 CV score: 5.095011e+14 
Fixed bandwidth: 1973.501 CV score: 4.85724e+14 
Fixed bandwidth: 1365.421 CV score: 4.766341e+14 
Fixed bandwidth: 1741.235 CV score: 4.772231e+14 
Fixed bandwidth: 1508.969 CV score: 4.745788e+14 
Fixed bandwidth: 1454.139 CV score: 4.749631e+14 
Fixed bandwidth: 1542.857 CV score: 4.7456e+14 
Fixed bandwidth: 1563.8 CV score: 4.746245e+14 
Fixed bandwidth: 1529.913 CV score: 4.745487e+14 
Fixed bandwidth: 1521.913 CV score: 4.745531e+14 
Fixed bandwidth: 1534.857 CV score: 4.745504e+14 
Fixed bandwidth: 1526.857 CV score: 4.745494e+14 
Fixed bandwidth: 1531.801 CV score: 4.74549e+14 
Fixed bandwidth: 1528.746 CV score: 4.745488e+14 
Fixed bandwidth: 1530.634 CV score: 4.745488e+14 
Fixed bandwidth: 1529.467 CV score: 4.745487e+14 
Fixed bandwidth: 1530.188 CV score: 4.745487e+14 
Fixed bandwidth: 1529.743 CV score: 4.745487e+14 
Fixed bandwidth: 1530.018 CV score: 4.745487e+14 
Fixed bandwidth: 1529.848 CV score: 4.745487e+14 
Fixed bandwidth: 1529.808 CV score: 4.745487e+14 
Fixed bandwidth: 1529.873 CV score: 4.745487e+14 
Fixed bandwidth: 1529.888 CV score: 4.745487e+14 
Fixed bandwidth: 1529.863 CV score: 4.745487e+14 
Fixed bandwidth: 1529.879 CV score: 4.745487e+14 
Fixed bandwidth: 1529.869 CV score: 4.745487e+14 
Fixed bandwidth: 1529.867 CV score: 4.745487e+14 
Fixed bandwidth: 1529.87 CV score: 4.745487e+14 
Fixed bandwidth: 1529.871 CV score: 4.745487e+14 
Fixed bandwidth: 1529.87 CV score: 4.745487e+14 
Fixed bandwidth: 1529.871 CV score: 4.745487e+14 
Fixed bandwidth: 1529.87 CV score: 4.745487e+14 
Fixed bandwidth: 1529.871 CV score: 4.745487e+14 
Fixed bandwidth: 1529.87 CV score: 4.745487e+14 
Fixed bandwidth: 1529.87 CV score: 4.745487e+14 
Fixed bandwidth: 1529.87 CV score: 4.745487e+14 

GWModel method - fixed bandwith

gwr.fixed <- gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE + 
                         PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
                         PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN + 
                         PROX_MRT  + PROX_PARK + PROX_PRIMARY_SCH + 
                         PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET + 
                         PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
                       data=condo_resale.sp, 
                       bw=bw.fixed, 
                       kernel = 'gaussian', 
                       longlat = FALSE)
gwr.fixed
   ***********************************************************************
   *                       Package   GWmodel                             *
   ***********************************************************************
   Program starts at: 2022-12-10 22:46:24 
   Call:
   gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD + 
    PROX_CHILDCARE + PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA + 
    PROX_HAWKER_MARKET + PROX_KINDERGARTEN + PROX_MRT + PROX_PARK + 
    PROX_PRIMARY_SCH + PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + 
    PROX_SUPERMARKET + PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + 
    FREEHOLD, data = condo_resale.sp, bw = bw.fixed, kernel = "gaussian", 
    longlat = FALSE)

   Dependent (y) variable:  SELLING_PRICE
   Independent variables:  AREA_SQM AGE PROX_CBD PROX_CHILDCARE PROX_ELDERLYCARE PROX_URA_GROWTH_AREA PROX_HAWKER_MARKET PROX_KINDERGARTEN PROX_MRT PROX_PARK PROX_PRIMARY_SCH PROX_TOP_PRIMARY_SCH PROX_SHOPPING_MALL PROX_SUPERMARKET PROX_BUS_STOP NO_Of_UNITS FAMILY_FRIENDLY FREEHOLD
   Number of data points: 1436
   ***********************************************************************
   *                    Results of Global Regression                     *
   ***********************************************************************

   Call:
    lm(formula = formula, data = data)

   Residuals:
     Min       1Q   Median       3Q      Max 
-3475964  -293923   -23069   241043 12260381 

   Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
   (Intercept)           481728.40  121441.01   3.967 7.65e-05 ***
   AREA_SQM               12708.32     369.59  34.385  < 2e-16 ***
   AGE                   -24440.82    2763.16  -8.845  < 2e-16 ***
   PROX_CBD              -78669.78    6768.97 -11.622  < 2e-16 ***
   PROX_CHILDCARE       -351617.91  109467.25  -3.212  0.00135 ** 
   PROX_ELDERLYCARE      171029.42   42110.51   4.061 5.14e-05 ***
   PROX_URA_GROWTH_AREA   38474.53   12523.57   3.072  0.00217 ** 
   PROX_HAWKER_MARKET     23746.10   29299.76   0.810  0.41782    
   PROX_KINDERGARTEN     147468.99   82668.87   1.784  0.07466 .  
   PROX_MRT             -314599.68   57947.44  -5.429 6.66e-08 ***
   PROX_PARK             563280.50   66551.68   8.464  < 2e-16 ***
   PROX_PRIMARY_SCH      180186.08   65237.95   2.762  0.00582 ** 
   PROX_TOP_PRIMARY_SCH    2280.04   20410.43   0.112  0.91107    
   PROX_SHOPPING_MALL   -206604.06   42840.60  -4.823 1.57e-06 ***
   PROX_SUPERMARKET      -44991.80   77082.64  -0.584  0.55953    
   PROX_BUS_STOP         683121.35  138353.28   4.938 8.85e-07 ***
   NO_Of_UNITS             -231.18      89.03  -2.597  0.00951 ** 
   FAMILY_FRIENDLY       140340.77   47020.55   2.985  0.00289 ** 
   FREEHOLD              359913.01   49220.22   7.312 4.38e-13 ***

   ---Significance stars
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
   Residual standard error: 755800 on 1417 degrees of freedom
   Multiple R-squared: 0.6518
   Adjusted R-squared: 0.6474 
   F-statistic: 147.4 on 18 and 1417 DF,  p-value: < 2.2e-16 
   ***Extra Diagnostic information
   Residual sum of squares: 8.094732e+14
   Sigma(hat): 751322.9
   AIC:  42970.18
   AICc:  42970.77
   BIC:  41784.96
   ***********************************************************************
   *          Results of Geographically Weighted Regression              *
   ***********************************************************************

   *********************Model calibration information*********************
   Kernel function: gaussian 
   Fixed bandwidth: 1529.87 
   Regression points: the same locations as observations are used.
   Distance metric: Euclidean distance metric is used.

   ****************Summary of GWR coefficient estimates:******************
                               Min.     1st Qu.      Median     3rd Qu.
   Intercept            -1.7294e+06  5.0686e+05  1.2459e+06  2.0286e+06
   AREA_SQM              2.7257e+03  5.4383e+03  7.8679e+03  1.2293e+04
   AGE                  -8.0810e+04 -2.5075e+04 -1.2338e+04 -5.6014e+03
   PROX_CBD             -1.4408e+06 -2.7205e+05 -1.7043e+05 -6.6519e+04
   PROX_CHILDCARE       -3.2769e+06 -2.2120e+05 -5.9600e+04  1.0788e+05
   PROX_ELDERLYCARE     -1.6966e+06 -3.5677e+04  8.9834e+04  2.0528e+05
   PROX_URA_GROWTH_AREA -7.0723e+05  9.2127e+03  7.3288e+04  1.8958e+05
   PROX_HAWKER_MARKET   -6.4011e+05 -5.6009e+04  8.2983e+04  4.3080e+05
   PROX_KINDERGARTEN    -1.8548e+06 -3.5646e+05 -1.6452e+05  1.3895e+05
   PROX_MRT             -2.7453e+06 -6.3607e+05 -2.4833e+05 -7.1555e+04
   PROX_PARK            -1.0939e+06 -1.5591e+05  1.0240e+04  3.2499e+05
   PROX_PRIMARY_SCH     -6.2015e+05 -1.8895e+05  1.4868e+03  3.8470e+05
   PROX_TOP_PRIMARY_SCH -7.6447e+05 -1.1127e+05 -1.7162e+04  5.2347e+04
   PROX_SHOPPING_MALL   -9.5378e+05 -1.6176e+05 -2.1287e+04  7.0973e+04
   PROX_SUPERMARKET     -7.2192e+05 -1.1000e+05 -5.9072e+03  1.5699e+05
   PROX_BUS_STOP        -6.2152e+05  4.5824e+04  4.3923e+05  1.5814e+06
   NO_Of_UNITS          -1.6083e+03 -2.9384e+02 -1.0619e+02  5.5081e+00
   FAMILY_FRIENDLY      -1.4636e+06 -4.4834e+04  1.4704e+04  1.7087e+05
   FREEHOLD             -1.4458e+05  8.0776e+04  1.8066e+05  3.4671e+05
                             Max.
   Intercept            9517178.8
   AREA_SQM               19022.4
   AGE                    34460.5
   PROX_CBD              535635.5
   PROX_CHILDCARE       1209089.3
   PROX_ELDERLYCARE     2236812.0
   PROX_URA_GROWTH_AREA 1511628.7
   PROX_HAWKER_MARKET   2415026.9
   PROX_KINDERGARTEN     782179.4
   PROX_MRT              734567.9
   PROX_PARK            1074304.2
   PROX_PRIMARY_SCH     1472504.3
   PROX_TOP_PRIMARY_SCH  883983.8
   PROX_SHOPPING_MALL    595084.1
   PROX_SUPERMARKET     1647456.7
   PROX_BUS_STOP        5131416.8
   NO_Of_UNITS             1483.4
   FAMILY_FRIENDLY      1278255.4
   FREEHOLD              885791.0
   ************************Diagnostic information*************************
   Number of data points: 1436 
   Effective number of parameters (2trace(S) - trace(S'S)): 310.8448 
   Effective degrees of freedom (n-2trace(S) + trace(S'S)): 1125.155 
   AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 42237.55 
   AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 41869.48 
   BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42030.5 
   Residual sum of squares: 3.238745e+14 
   R-square value:  0.860678 
   Adjusted R-square value:  0.8221535 

   ***********************************************************************
   Program stops at: 2022-12-10 22:46:25 

The AICc of the geospatial weighted regression is 42237.55, less than the AICc = 42970.77 of the global regression model.

Building Adaptive Bandwidth GWR Model

Computing the adaptive bandwidth

The step here is similar to the steps in fixed bandwidth GWR but adaptive = TRUE.

bw.adaptive <- bw.gwr(formula = SELLING_PRICE ~ AREA_SQM + AGE + 
                     PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
                     PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN + 
                     PROX_MRT  + PROX_PARK + PROX_PRIMARY_SCH + 
                     PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
                     PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD, 
                   data=condo_resale.sp, 
                   approach="CV", 
                   kernel="gaussian", 
                   adaptive=TRUE, 
                   longlat=FALSE)
Adaptive bandwidth: 895 CV score: 7.892714e+14 
Adaptive bandwidth: 561 CV score: 7.538184e+14 
Adaptive bandwidth: 354 CV score: 6.768593e+14 
Adaptive bandwidth: 226 CV score: 5.979758e+14 
Adaptive bandwidth: 147 CV score: 5.499621e+14 
Adaptive bandwidth: 98 CV score: 5.282287e+14 
Adaptive bandwidth: 68 CV score: 5.033227e+14 
Adaptive bandwidth: 49 CV score: 4.744074e+14 
Adaptive bandwidth: 37 CV score: 4.608087e+14 
Adaptive bandwidth: 30 CV score: 4.452109e+14 
Adaptive bandwidth: 25 CV score: 4.584895e+14 
Adaptive bandwidth: 32 CV score: 4.506915e+14 
Adaptive bandwidth: 27 CV score: 4.594263e+14 
Adaptive bandwidth: 30 CV score: 4.452109e+14 

The result shows that the 30 is the recommended data points to be used.

Constructing the adaptive bandwidth gwr model

gwr.adaptive <- gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE + 
                         PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
                         PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN + 
                         PROX_MRT  + PROX_PARK + PROX_PRIMARY_SCH + 
                         PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET + 
                         PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
                       data=condo_resale.sp, 
                       bw=bw.adaptive, 
                       kernel = 'gaussian',
                       adaptive = TRUE,
                       longlat = FALSE)
gwr.adaptive
   ***********************************************************************
   *                       Package   GWmodel                             *
   ***********************************************************************
   Program starts at: 2022-12-10 22:46:34 
   Call:
   gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD + 
    PROX_CHILDCARE + PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA + 
    PROX_HAWKER_MARKET + PROX_KINDERGARTEN + PROX_MRT + PROX_PARK + 
    PROX_PRIMARY_SCH + PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + 
    PROX_SUPERMARKET + PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + 
    FREEHOLD, data = condo_resale.sp, bw = bw.adaptive, kernel = "gaussian", 
    adaptive = TRUE, longlat = FALSE)

   Dependent (y) variable:  SELLING_PRICE
   Independent variables:  AREA_SQM AGE PROX_CBD PROX_CHILDCARE PROX_ELDERLYCARE PROX_URA_GROWTH_AREA PROX_HAWKER_MARKET PROX_KINDERGARTEN PROX_MRT PROX_PARK PROX_PRIMARY_SCH PROX_TOP_PRIMARY_SCH PROX_SHOPPING_MALL PROX_SUPERMARKET PROX_BUS_STOP NO_Of_UNITS FAMILY_FRIENDLY FREEHOLD
   Number of data points: 1436
   ***********************************************************************
   *                    Results of Global Regression                     *
   ***********************************************************************

   Call:
    lm(formula = formula, data = data)

   Residuals:
     Min       1Q   Median       3Q      Max 
-3475964  -293923   -23069   241043 12260381 

   Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
   (Intercept)           481728.40  121441.01   3.967 7.65e-05 ***
   AREA_SQM               12708.32     369.59  34.385  < 2e-16 ***
   AGE                   -24440.82    2763.16  -8.845  < 2e-16 ***
   PROX_CBD              -78669.78    6768.97 -11.622  < 2e-16 ***
   PROX_CHILDCARE       -351617.91  109467.25  -3.212  0.00135 ** 
   PROX_ELDERLYCARE      171029.42   42110.51   4.061 5.14e-05 ***
   PROX_URA_GROWTH_AREA   38474.53   12523.57   3.072  0.00217 ** 
   PROX_HAWKER_MARKET     23746.10   29299.76   0.810  0.41782    
   PROX_KINDERGARTEN     147468.99   82668.87   1.784  0.07466 .  
   PROX_MRT             -314599.68   57947.44  -5.429 6.66e-08 ***
   PROX_PARK             563280.50   66551.68   8.464  < 2e-16 ***
   PROX_PRIMARY_SCH      180186.08   65237.95   2.762  0.00582 ** 
   PROX_TOP_PRIMARY_SCH    2280.04   20410.43   0.112  0.91107    
   PROX_SHOPPING_MALL   -206604.06   42840.60  -4.823 1.57e-06 ***
   PROX_SUPERMARKET      -44991.80   77082.64  -0.584  0.55953    
   PROX_BUS_STOP         683121.35  138353.28   4.938 8.85e-07 ***
   NO_Of_UNITS             -231.18      89.03  -2.597  0.00951 ** 
   FAMILY_FRIENDLY       140340.77   47020.55   2.985  0.00289 ** 
   FREEHOLD              359913.01   49220.22   7.312 4.38e-13 ***

   ---Significance stars
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
   Residual standard error: 755800 on 1417 degrees of freedom
   Multiple R-squared: 0.6518
   Adjusted R-squared: 0.6474 
   F-statistic: 147.4 on 18 and 1417 DF,  p-value: < 2.2e-16 
   ***Extra Diagnostic information
   Residual sum of squares: 8.094732e+14
   Sigma(hat): 751322.9
   AIC:  42970.18
   AICc:  42970.77
   BIC:  41784.96
   ***********************************************************************
   *          Results of Geographically Weighted Regression              *
   ***********************************************************************

   *********************Model calibration information*********************
   Kernel function: gaussian 
   Adaptive bandwidth: 30 (number of nearest neighbours)
   Regression points: the same locations as observations are used.
   Distance metric: Euclidean distance metric is used.

   ****************Summary of GWR coefficient estimates:******************
                               Min.     1st Qu.      Median     3rd Qu.
   Intercept            -1.6028e+08 -5.4344e+05  9.6211e+05  1.8060e+06
   AREA_SQM              3.1331e+03  5.6904e+03  7.7690e+03  1.2440e+04
   AGE                  -9.9093e+04 -3.0879e+04 -1.3909e+04 -6.4137e+03
   PROX_CBD             -2.0014e+07 -3.0226e+05 -1.1138e+05 -3.9884e+04
   PROX_CHILDCARE       -1.1390e+06 -1.7672e+05 -7.6155e+03  4.0347e+05
   PROX_ELDERLYCARE     -3.2078e+06 -1.4195e+05  8.7037e+04  2.7724e+05
   PROX_URA_GROWTH_AREA -2.0846e+07 -1.4344e+04  8.3095e+04  3.9801e+05
   PROX_HAWKER_MARKET   -2.0295e+06 -1.1077e+05  9.3663e+04  5.5331e+05
   PROX_KINDERGARTEN    -1.8346e+06 -3.1986e+05 -2.3020e+04  2.1987e+05
   PROX_MRT             -2.6114e+07 -9.2429e+05 -2.6791e+05 -4.2711e+04
   PROX_PARK            -4.1401e+06 -2.0343e+05  6.3772e+04  4.4498e+05
   PROX_PRIMARY_SCH     -1.3511e+06 -2.2819e+05 -2.6660e+04  4.8742e+05
   PROX_TOP_PRIMARY_SCH -5.6507e+06 -1.9060e+05 -1.5224e+04  7.1469e+04
   PROX_SHOPPING_MALL   -1.3105e+06 -1.4062e+05 -1.6413e+04  1.4532e+05
   PROX_SUPERMARKET     -3.0962e+06 -3.1361e+05 -5.2868e+04  1.0679e+05
   PROX_BUS_STOP        -2.1838e+06 -3.8619e+04  4.3163e+05  1.3596e+06
   NO_Of_UNITS          -2.6839e+03 -2.7312e+02 -7.2995e+01  4.4365e+01
   FAMILY_FRIENDLY      -6.0104e+05 -6.9317e+04  1.3347e+04  2.4512e+05
   FREEHOLD             -2.8864e+05  4.8910e+04  1.7121e+05  3.6600e+05
                              Max.
   Intercept            2.7054e+07
   AREA_SQM             2.3811e+04
   AGE                  1.0352e+04
   PROX_CBD             2.3502e+07
   PROX_CHILDCARE       3.2146e+06
   PROX_ELDERLYCARE     2.3324e+06
   PROX_URA_GROWTH_AREA 2.0730e+07
   PROX_HAWKER_MARKET   5.1540e+06
   PROX_KINDERGARTEN    2.4536e+06
   PROX_MRT             1.2674e+06
   PROX_PARK            3.3645e+06
   PROX_PRIMARY_SCH     2.9412e+06
   PROX_TOP_PRIMARY_SCH 1.4177e+07
   PROX_SHOPPING_MALL   1.5711e+07
   PROX_SUPERMARKET     1.5599e+06
   PROX_BUS_STOP        1.2288e+07
   NO_Of_UNITS          6.6179e+02
   FAMILY_FRIENDLY      2.0840e+06
   FREEHOLD             1.8127e+06
   ************************Diagnostic information*************************
   Number of data points: 1436 
   Effective number of parameters (2trace(S) - trace(S'S)): 402.3843 
   Effective degrees of freedom (n-2trace(S) + trace(S'S)): 1033.616 
   AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 41993.54 
   AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 41449.96 
   BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42115.63 
   Residual sum of squares: 2.286415e+14 
   R-square value:  0.9016446 
   Adjusted R-square value:  0.863318 

   ***********************************************************************
   Program stops at: 2022-12-10 22:46:36 

The AICc of the adaptive distance geospatial weight regression is 41993.54, less than the adaptive weight global regression’s AICc of 42970.77.

Converting SDF into sf data.frame

The adaptive gwr model has the lowest AICc of them all. Therefore, we will visualize it.

To visualise the fields in SDF, we need to first covert it into sf data.frame.

condo_resale.sf.adaptive <- st_as_sf(gwr.adaptive$SDF) %>%
  st_transform(crs=3414)
condo_resale.sf.adaptive.svy21 <- st_transform(condo_resale.sf.adaptive, 3414)
gwr.adaptive.output <- as.data.frame(gwr.adaptive$SDF)
condo_resale.sf.adaptive <- cbind(condo_resale.res.sf, as.matrix(gwr.adaptive.output))
glimpse(condo_resale.sf.adaptive)
Rows: 1,436
Columns: 89
$ POSTCODE                <dbl> 118635, 288420, 267833, 258380, 467169, 466472…
$ SELLING_PRICE           <dbl> 3000000, 3880000, 3325000, 4250000, 1400000, 1…
$ AREA_SQM                <dbl> 309, 290, 248, 127, 145, 139, 218, 141, 165, 1…
$ AGE                     <dbl> 30, 32, 33, 7, 28, 22, 24, 24, 27, 31, 17, 22,…
$ PROX_CBD                <dbl> 7.941259, 6.609797, 6.898000, 4.038861, 11.783…
$ PROX_CHILDCARE          <dbl> 0.16597932, 0.28027246, 0.42922669, 0.39473543…
$ PROX_ELDERLYCARE        <dbl> 2.5198118, 1.9333338, 0.5021395, 1.9910316, 1.…
$ PROX_URA_GROWTH_AREA    <dbl> 6.618741, 7.505109, 6.463887, 4.906512, 6.4106…
$ PROX_HAWKER_MARKET      <dbl> 1.76542207, 0.54507614, 0.37789301, 1.68259969…
$ PROX_KINDERGARTEN       <dbl> 0.05835552, 0.61592412, 0.14120309, 0.38200076…
$ PROX_MRT                <dbl> 0.5607188, 0.6584461, 0.3053433, 0.6910183, 0.…
$ PROX_PARK               <dbl> 1.1710446, 0.1992269, 0.2779886, 0.9832843, 0.…
$ PROX_PRIMARY_SCH        <dbl> 1.6340256, 0.9747834, 1.4715016, 1.4546324, 0.…
$ PROX_TOP_PRIMARY_SCH    <dbl> 3.3273195, 0.9747834, 1.4715016, 2.3006394, 0.…
$ PROX_SHOPPING_MALL      <dbl> 2.2102717, 2.9374279, 1.2256850, 0.3525671, 1.…
$ PROX_SUPERMARKET        <dbl> 0.9103958, 0.5900617, 0.4135583, 0.4162219, 0.…
$ PROX_BUS_STOP           <dbl> 0.10336166, 0.28673408, 0.28504777, 0.29872340…
$ NO_Of_UNITS             <dbl> 18, 20, 27, 30, 30, 31, 32, 32, 32, 32, 34, 34…
$ FAMILY_FRIENDLY         <dbl> 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0…
$ FREEHOLD                <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1…
$ LEASEHOLD_99YR          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ LOG_SELLING_PRICE       <dbl> 14.91412, 15.17135, 15.01698, 15.26243, 14.151…
$ MLR_RES                 <dbl> -1442408.78, 389648.30, 221790.82, 1087048.92,…
$ Intercept               <dbl> 2768698.98, 4398265.21, 2605216.84, 84393.17, …
$ AREA_SQM.1              <dbl> 9272.004, 16214.948, 12747.798, 20868.138, 671…
$ AGE.1                   <dbl> -11593.551, -47856.312, -24847.155, -97346.332…
$ PROX_CBD.1              <dbl> -160081.00, -288927.21, -242886.78, -281850.14…
$ PROX_CHILDCARE.1        <dbl> 75553.025, 671015.741, -590017.922, 184972.622…
$ PROX_ELDERLYCARE.1      <dbl> -121956.742, 603472.239, 847742.589, -63177.10…
$ PROX_URA_GROWTH_AREA.1  <dbl> -184786.98, -276295.72, -25208.76, -56506.88, …
$ PROX_HAWKER_MARKET.1    <dbl> 268889.46, 596904.66, 569650.53, 1261821.05, 8…
$ PROX_KINDERGARTEN.1     <dbl> 123461.86, -470509.83, -70054.35, -1115075.03,…
$ PROX_MRT.1              <dbl> -362362.010, -2101706.434, -1160810.281, -2423…
$ PROX_PARK.1             <dbl> -328874.63, 80854.05, 317952.19, 47522.12, 108…
$ PROX_PRIMARY_SCH.1      <dbl> 535160.687, 1330164.374, 977828.079, 1886073.8…
$ PROX_TOP_PRIMARY_SCH.1  <dbl> -204772.23, -944577.89, -563446.78, -451298.30…
$ PROX_SHOPPING_MALL.1    <dbl> 96331.950, -552287.901, -739489.566, 172153.49…
$ PROX_SUPERMARKET.1      <dbl> -316646.820, -95812.393, 474130.208, -627179.9…
$ PROX_BUS_STOP.1         <dbl> 1270824.454, 2092787.882, 1235238.025, 8492969…
$ NO_Of_UNITS.1           <dbl> 192.115888, -109.054272, -15.449728, -75.20922…
$ FAMILY_FRIENDLY.1       <dbl> -47759.96, 300279.69, -54089.77, 1617558.95, 1…
$ FREEHOLD.1              <dbl> 357765.43, 537348.66, 79654.29, 977814.22, 309…
$ y                       <dbl> 3000000, 3880000, 3325000, 4250000, 1400000, 1…
$ yhat                    <dbl> 3000817.6, 3440670.4, 3550886.0, 5671634.3, 13…
$ residual                <dbl> -817.607, 439329.595, -225885.991, -1421634.32…
$ CV_Score                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Stud_residual           <dbl> -0.00398958, 1.11709144, -0.75098689, -3.51956…
$ Intercept_SE            <dbl> 450686.8, 598911.2, 968524.8, 500366.6, 337300…
$ AREA_SQM_SE             <dbl> 822.7387, 818.3671, 990.2089, 608.4580, 1360.8…
$ AGE_SE                  <dbl> 6123.568, 6091.919, 6284.537, 5912.757, 8069.9…
$ PROX_CBD_SE             <dbl> 35701.74, 29767.90, 57745.47, 405765.10, 63742…
$ PROX_CHILDCARE_SE       <dbl> 347907.5, 327242.7, 350890.5, 364739.8, 718039…
$ PROX_ELDERLYCARE_SE     <dbl> 192340.90, 95841.89, 147228.39, 158423.68, 403…
$ PROX_URA_GROWTH_AREA_SE <dbl> 55569.90, 72876.90, 100941.52, 414548.44, 6872…
$ PROX_HAWKER_MARKET_SE   <dbl> 183479.6, 115799.2, 171490.4, 215119.1, 237856…
$ PROX_KINDERGARTEN_SE    <dbl> 332018.6, 208196.6, 354059.4, 178900.4, 575983…
$ PROX_MRT_SE             <dbl> 200360.6, 264614.9, 282378.2, 291083.2, 400947…
$ PROX_PARK_SE            <dbl> 218006.3, 245460.0, 368800.2, 249147.3, 454475…
$ PROX_PRIMARY_SCH_SE     <dbl> 150239.8, 171107.7, 224293.3, 268702.4, 363219…
$ PROX_TOP_PRIMARY_SCH_SE <dbl> 90612.50, 116036.81, 169306.50, 123466.16, 282…
$ PROX_SHOPPING_MALL_SE   <dbl> 220250.2, 105003.4, 164026.2, 205600.1, 297965…
$ PROX_SUPERMARKET_SE     <dbl> 366145.2, 231139.1, 213020.8, 300174.0, 304082…
$ PROX_BUS_STOP_SE        <dbl> 642590.0, 410362.6, 468910.8, 610206.4, 768059…
$ NO_Of_UNITS_SE          <dbl> 238.9019, 207.9118, 214.4963, 353.7506, 329.74…
$ FAMILY_FRIENDLY_SE      <dbl> 133460.38, 110038.49, 154399.68, 107806.70, 15…
$ FREEHOLD_SE             <dbl> 111520.88, 131923.99, 145975.38, 134028.31, 21…
$ Intercept_TV            <dbl> 6.14328795, 7.34376865, 2.68988139, 0.16866270…
$ AREA_SQM_TV             <dbl> 11.269683, 19.813782, 12.873846, 34.296760, 4.…
$ AGE_TV                  <dbl> -1.8932674, -7.8557044, -3.9536972, -16.463780…
$ PROX_CBD_TV             <dbl> -4.48384369, -9.70599963, -4.20616167, -0.6946…
$ PROX_CHILDCARE_TV       <dbl> 0.21716410, 2.05051393, -1.68148738, 0.5071358…
$ PROX_ELDERLYCARE_TV     <dbl> -0.634065559, 6.296539138, 5.758010155, -0.398…
$ PROX_URA_GROWTH_AREA_TV <dbl> -3.32530713, -3.79126627, -0.24973626, -0.1363…
$ PROX_HAWKER_MARKET_TV   <dbl> 1.46550096, 5.15465184, 3.32176393, 5.86568583…
$ PROX_KINDERGARTEN_TV    <dbl> 0.37185225, -2.25993044, -0.19786046, -6.23293…
$ PROX_MRT_TV             <dbl> -1.80854965, -7.94250970, -4.11083538, -8.3256…
$ PROX_PARK_TV            <dbl> -1.5085553, 0.3293981, 0.8621258, 0.1907391, 0…
$ PROX_PRIMARY_SCH_TV     <dbl> 3.562042617, 7.773842102, 4.359594852, 7.01919…
$ PROX_TOP_PRIMARY_SCH_TV <dbl> -2.25986739, -8.14032946, -3.32796898, -3.6552…
$ PROX_SHOPPING_MALL_TV   <dbl> 0.43737500, -5.25971310, -4.50836228, 0.837321…
$ PROX_SUPERMARKET_TV     <dbl> -0.86481222, -0.41452261, 2.22574657, -2.08938…
$ PROX_BUS_STOP_TV        <dbl> 1.977659938, 5.099851002, 2.634270689, 13.9181…
$ NO_Of_UNITS_TV          <dbl> 0.80416237, -0.52452177, -0.07202795, -0.21260…
$ FAMILY_FRIENDLY_TV      <dbl> -0.3578587, 2.7288605, -0.3503231, 15.0042528,…
$ FREEHOLD_TV             <dbl> 3.2080579, 4.0731688, 0.5456693, 7.2955797, 1.…
$ Local_R2                <dbl> 0.9055980, 0.8912740, 0.9029717, 0.9167747, 0.…
$ coords.x1               <dbl> 22085.12, 25656.84, 23963.99, 27044.28, 41042.…
$ coords.x2               <dbl> 29951.54, 34546.20, 32890.80, 32319.77, 33743.…
$ geometry                <POINT [m]> POINT (22085.12 29951.54), POINT (25656.…
summary(gwr.adaptive$SDF$yhat)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
  398408  1098831  1377586  1753125  1984196 13871610 

Visualizing local R2

tmap_mode("view")
tm_shape(mpsz)+
  tm_polygons(alpha = 0.1) +
tm_shape(condo_resale.sf.adaptive) +  
  tm_dots(col = "Local_R2",
          border.col = "gray60",
          border.lwd = 1) +
  tm_view(set.zoom.limits = c(11,14))
tmap_mode("plot")

Visualising coefficient estimates

By using sync = TRUE in ggarrange(), we can display two maps side by side with synchronous interactions.

tmap_mode("view")
AREA_SQM_SE <- tm_shape(mpsz)+
  tm_polygons(alpha = 0.1) +
tm_shape(condo_resale.sf.adaptive) +  
  tm_dots(col = "AREA_SQM_SE",
          border.col = "gray60",
          border.lwd = 1) +
  tm_view(set.zoom.limits = c(11,14))

AREA_SQM_TV <- tm_shape(mpsz)+
  tm_polygons(alpha = 0.1) +
tm_shape(condo_resale.sf.adaptive) +  
  tm_dots(col = "AREA_SQM_TV",
          border.col = "gray60",
          border.lwd = 1) +
  tm_view(set.zoom.limits = c(11,14))

tmap_arrange(AREA_SQM_SE, AREA_SQM_TV, 
             asp=1, ncol=2,
             sync = TRUE)
tmap_mode("plot")

By URA Plannign Region

tm_shape(mpsz[mpsz$REGION_N=="CENTRAL REGION", ])+
  tm_polygons()+
tm_shape(condo_resale.sf.adaptive) + 
  tm_bubbles(col = "Local_R2",
           size = 0.15,
           border.col = "gray60",
           border.lwd = 1)

tmap_mode("plot")